2024 Daiichi DS-1062a

bulk RNA-seq

library(dplyr)
library(ggplot2)
library(DT)
dir = "~/Desktop/DF/DFCI_Paweletz/2024_Daiichi_bulk_RNAseq/"
count_TPM_matrix_all = readRDS(paste0(dir,"raw_data/30342/star30342_rawData.rds"))
counts = count_TPM_matrix_all$count

# rename column names 
colnames(counts)= sub("_Count", "", colnames(counts))
rownames(counts) = counts$SYMBOL
counts = counts[,-1]

# Remove genes with no expression across samples 
counts = counts[rowSums(counts) !=0,]
# counts %>% nrow() # 22845
tpms = count_TPM_matrix_all$TPM

# rename column names 
colnames(tpms)= sub("_TPM", "", colnames(tpms))
rownames(tpms) = tpms$SYMBOL
tpms = tpms[,-1]

# match the rownames with counts 
tpms = tpms[rownames(counts),]
# tpms %>% nrow() # 22845

DEG analysis

List

  • Dxd/DMSO
  • IgG-Dxd/DMSO
  • DS-1062a/DMSO

DxD vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "DXD"
cols = c(c(1:3), c(4:6))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
res1 = res

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') +
  xlim(c(-10,10)) +ylim(c(0,150))

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  xlim(c(-10,10)) +ylim(c(0,150)) +
  theme_bw() 

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) +ylim(c(0,150)) + scale_y_sqrt()

DEG table

res %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

IgG vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "IgG"
cols = c(c(1:3), c(7:9))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
res2 = res

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') +
  xlim(c(-10,10)) +ylim(c(0,150))

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  xlim(c(-10,10)) +ylim(c(0,150)) +
  theme_bw() 

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) +ylim(c(0,150)) + scale_y_sqrt()

DEG table

res %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

DS-1062a vs DMSO

library(DESeq2)
count.mtx = counts
cont = "DMSO"
tret = "DS1062a"
cols = c(c(1:3), c(10:12))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
res3 =res

Volcano plot (version1)

fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res %>% 
  ggplot(aes(log2FoldChange, -log10(pvalue))) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') +
  xlim(c(-10,10)) +ylim(c(0,150))

Volcano plot (version2)

up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(pval), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  xlim(c(-10,10)) +ylim(c(0,150)) +
  theme_bw() 

Volcano plot (version3)

res %>% ggplot(aes(log2FoldChange, -log10(pvalue), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() +
  xlim(c(-10,10)) +ylim(c(0,150)) + scale_y_sqrt()

DEG table

res %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
library(clusterProfiler)
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$log2FoldChange
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  set.seed(123)
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# Application 
gsea.res = perform_GSEA(res = res, ref = hallmark) 

filtered_gsea = gsea.res %>% mutate(sig= ifelse(pvalue <= 0.05,"p value <= 0.05", "p value > 0.05"))

# Modified GSEA NES plot 
gsea_nes_plot =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=sig), color="grey1", size=0.2) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title= "GSEA") + 
    theme_classic() +
    # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
    scale_fill_manual(values = c("#FF0000","grey88")) +
    theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
          axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
          axis.title =element_text(size=10)) +ggtitle(title)
}

# Modified GSEA NES plot (p value color version) 
gsea_nes_plot_pval =function(gsea.res, title, fontsize.x = 5, fontsize.y = 6){
  gsea.res %>% ggplot(aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=-pvalue), color="grey1", size=0.2) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title= "GSEA") + 
  theme_classic() +
  # scale_fill_gradient(low = '#FF0000', high = '#E5E7E9') +
  scale_fill_gradient(low = "grey88",high ="#FF0000" ) +
  theme(axis.text.x= element_text(size=fontsize.x, face = 'bold'),
        axis.text.y= element_text(size=fontsize.y, face = 'bold'), 
        axis.title =element_text(size=10),
        legend.title = element_blank()) +ggtitle(title)
}

GSEA NES Plot 1

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot_pval(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

GSEA NES Plot 2

t= paste0("DEGs from ",tret, " / ", cont )

gsea_nes_plot(filtered_gsea, title = t, fontsize.x = 8, fontsize.y = 8)

res.all = list(res1=res1,res2=res2,res3=res3)
res.all %>% saveRDS(paste0(dir, "data/DMSO_3_comparison_res.all.rds"))

DEGs comparison

res.all = readRDS(paste0(dir, "data/DMSO_3_comparison_res.all.rds"))
degs = res.all
# Add DE information 
fc = 1.2
pval = 0.05
degs <- lapply(degs, function(df) {
  df %>% mutate(DE = ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                            ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN', 'no_sig'))) %>%
    mutate(DE = factor(DE, levels = c('UP', 'DN', 'no_sig')))
})

UP-regulated genes

# 'UP'으로 표시된 유전자 이름을 추출하여 새로운 리스트 생성
up_genes <- lapply(degs, function(df) {
  df %>% filter(DE == "UP") %>% rownames()
})

# degs 리스트의 각 요소와 동일한 이름을 새 리스트에 할당
names(up_genes) <- names(degs)
ven_list = up_genes

## draw venn diagram
ven_out <- VennDetail::venndetail(ven_list)
plot(ven_out) 

plot(ven_out, type = "upset") 

ven_out@result %>% DT::datatable()

DN-regulated genes

# 'DN'으로 표시된 유전자 이름을 추출하여 새로운 리스트 생성
selected_genes <- lapply(degs, function(df) {
  df %>% filter(DE == "DN") %>% rownames()
})

# degs 리스트의 각 요소와 동일한 이름을 새 리스트에 할당
names(selected_genes) <- names(degs)
ven_list = selected_genes

## draw venn diagram
ven_out <- VennDetail::venndetail(ven_list)
plot(ven_out) 

plot(ven_out, type = "upset") 

ven_out@result %>% DT::datatable()